{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "baa28a74-f874-42e4-a57d-b48a0af5b229",
   "metadata": {},
   "outputs": [],
   "source": [
    "# pip install pytube moviepy openai-whisper"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "1f12058d-29cd-4d37-90ad-f0440436a971",
   "metadata": {},
   "outputs": [],
   "source": [
    "# !pip install you-get\n",
    "# !pip install yt-dlp"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "5101bc6a-f847-4f53-bbc9-e8b5b6a3155a",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pytube\n",
    "import moviepy.editor as mp\n",
    "import whisper\n",
    "import os\n",
    "import subprocess\n",
    "\n",
    "\n",
    "# 使用 yt-dlp 下载视频\n",
    "def download_video(url, output_path='video.mp4'):\n",
    "    subprocess.run(['yt-dlp', '-o', output_path, url])\n",
    "    return output_path\n",
    "\n",
    "# 从视频中提取音频\n",
    "def extract_audio(video_path, output_path='audio.wav'):\n",
    "    video = mp.VideoFileClip(video_path)\n",
    "    video.audio.write_audiofile(output_path)\n",
    "    return output_path\n",
    "\n",
    "# 转录音频\n",
    "def transcribe_audio_with_whisper(audio_path):\n",
    "    model = whisper.load_model(\"large\")\n",
    "    # result = model.transcribe(audio_path, language='en')\n",
    "    result = model.transcribe(audio_path)\n",
    "    return result['text']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "6912b1ae-ed9d-4947-8e7b-f16ccdbddfeb",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Downloading video...\n",
      "[BiliBili] Extracting URL: https://www.bilibili.com/video/BV12J4m1N7wU/\n",
      "[BiliBili] 12J4m1N7wU: Downloading webpage\n",
      "[BiliBili] BV12J4m1N7wU: Extracting videos in anthology\n",
      "[BiliBili] 1254605812: Extracting chapters\n",
      "[BiliBili] Format(s) 1080P 高码率, 1080P 高清, 720P 高清 are missing; you have to login or become premium member to download them. Use --cookies-from-browser or --cookies for the authentication. See  https://github.com/yt-dlp/yt-dlp/wiki/FAQ#how-do-i-pass-cookies-to-yt-dlp  for how to manually pass cookies\n",
      "[info] BV12J4m1N7wU: Downloading 1 format(s): 100110+30280\n",
      "[download] Destination: ./video.f100110.mp4\n",
      "[download] 100% of  425.01KiB in 00:00:00 at 7.64MiB/s     \n",
      "[download] Destination: ./video.f30280.m4a\n",
      "[download] 100% of  274.45KiB in 00:00:00 at 10.31MiB/s    \n",
      "[Merger] Merging formats into \"./video.mp4\"\n",
      "Deleting original file ./video.f30280.m4a (pass -k to keep)\n",
      "Deleting original file ./video.f100110.mp4 (pass -k to keep)\n",
      "Extracting audio...\n",
      "MoviePy - Writing audio in ./audio.wav\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "                                                                    \r"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "MoviePy - Done.\n",
      "Transcribing audio with Whisper...\n",
      "Transcription:\n",
      " Saying goodbye, I don't think is ever nice, but saying goodbye without Feeling sad or feeling hurt or whatever that would that would just mean that the time we spent together was Right great and had a great time. So it was always clear that it will be tough and I know it will be tough\n"
     ]
    }
   ],
   "source": [
    "\n",
    "video_url = 'https://www.bilibili.com/video/BV12J4m1N7wU/'\n",
    "video_path = './video.mp4'\n",
    "audio_path = './audio.wav'\n",
    "\n",
    "print(\"Downloading video...\")\n",
    "download_video(video_url, video_path)\n",
    "\n",
    "print(\"Extracting audio...\")\n",
    "extract_audio(video_path, audio_path)\n",
    "\n",
    "print(\"Transcribing audio with Whisper...\")\n",
    "transcription = transcribe_audio_with_whisper(audio_path)\n",
    "\n",
    "print(\"Transcription:\")\n",
    "print(transcription)\n",
    "\n",
    "# 清理临时文件\n",
    "# os.remove(video_path)\n",
    "# os.remove(audio_path)\n"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "21bbd3ef-11f5-4238-b128-e626a60d9fed",
   "metadata": {},
   "source": [
    "### whisper"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "a38dddf6-8349-415a-863f-e46a4e8d3b22",
   "metadata": {},
   "source": [
    "- `--task`\n",
    "    - transcribe：转录\n",
    "    - translate：翻译；"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "311847fb-838b-4cc9-8446-42e0e1979f53",
   "metadata": {},
   "source": [
    "### whisperx"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "74aa0570-2c77-49d6-94ef-660b19e67db2",
   "metadata": {},
   "source": [
    "- https://github.com/m-bain/whisperX"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "f37ccfab-b771-476a-8eb2-03d01d0fd831",
   "metadata": {},
   "source": [
    "- 说话人分离（--diarize）\n",
    "    - 注意一些 hf 的 models，也许需要在 hf 页面做申请，比如 `pyannote/segmentation-3.0`\n",
    "    - 也许需要在环境变量中配置 `hf_token`\n",
    "- srt file：（--highlight_words）"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.13"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
